# set working directory
setwd('~/replication_files')

# function to load and install packages
load_packages<-function(packages){
  sapply(packages,function(x){
    f<-require(x,character.only=T)
    if(!f) install.packages(x); f<-require(x,character.only=T)
    return(f)},USE.NAMES=T)
}

# load packages
load_packages(c('data.table','gtools','sandwich','miceadds'))

# format numbers to specific decimal places
specify_decimal<-function(x,ndec=3,drop0leading=F,drop0trailing=F,comma=F){
  f<-format(round(x,ndec),nsmall=ndec,drop0trailing=drop0trailing,scientific=F,big.mark=ifelse(comma,",",""))
  if(drop0leading){
    f<-sub("0.",".",f,fixed=T)
  }
  return(trimws(f))
}

# create stars depending on p-value
stars_pval<-function (p.value) {
  unclass(symnum(p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 1), symbols = c("***", "**", "*", "")))
}

# read data
(FinDT<-readRDS('Tables3A1.rds'))

# specify DVs
(DVs<-rep(c('provax_bin','provax_count','antivax_bin','antivax_count'),each=2))
# get number of models
(Nmods<-length(DVs))

# specify independent variables
(IVs_main<-rep(list(c('college','female','nonwhite','age30t44','age45t59','age60p','parent','caseid_file_date_N','log(caseid_N)'),
                    c('college','female','nonwhite','age30t44','age45t59','age60p','parent','caseid_file_date_N','log(caseid_N)','vaccine_scale')),4))
# add data name
(IVs_all<-lapply(IVs_main,function(x) c(x,'Data_Name')))

# create formulas
(formulas<-lapply(1:Nmods,function(x) as.formula(paste0(DVs[x],'~',paste(IVs_all[[x]],collapse='+')))))

# not sure why but running lm.cluster first outside of lapply seems to be necessary to get it to run inside lapply:
lm.cluster(data=FinDT,formula=formulas[[1]],cluster='resp_id')

# run models and combine into a list
modlist<-lapply(1:Nmods,function(x){
  # run linear model clustered on respondent
  mod<-miceadds::lm.cluster(data=FinDT,formula=formulas[[x]],cluster='resp_id')
  # get summary
  modsum<-summary(mod)
  # extract coefficients
  (coefs<-modsum[c(IVs_main[[x]],'(Intercept)'),])
  
  # get r^2
  (R2<-specify_decimal(summary(mod$lm_res)$r.squared,3))
  # get N
  (N<-paste0('\\multicolumn{1}{l}{',formatC(length(summary(mod$lm_res)$residuals),big.mark=','),'}'))
  # reformat estimates
  (Est<-specify_decimal(coefs[,1],3))
  # reformat standard errors
  (SE<-paste0('(',specify_decimal(coefs[,2],3),')'))
  # convert p value to a star
  (p<-stars_pval(coefs[,4]))
  # add star to coefficient
  (Estp<-paste0(Est,'{$^{',p,'}$}'))
  # create data.table
  (dt<-data.table(rn=row.names(coefs),Estp=Estp,SE=SE))
  # rename columns
  setnames(dt,2:3,paste0(c('Estp','SE'),x))
  # return a list with the data.table, R^2, and N
  list(dt=dt,R2=R2,N=N)
})

# extract data.tables from model list
(dtlist<-lapply(modlist,`[[`,1))

# merge the data.tables together
(dtmerge<-Reduce(function(x,y) merge(x,y,by='rn',all=T),dtlist))
# create an ordering data.table
(dtorder<-data.table(rn=c('college','female','nonwhite','age30t44','age45t59','age60p','parent','caseid_file_date_N','log(caseid_N)','vaccine_scale','(Intercept)')))
set(dtorder,NULL,'order',1:nrow(dtorder))

# merge model results with ordering data.table
(dtmerge<-merge(dtmerge,dtorder,by='rn'))
# reorder rows
setorder(dtmerge,order)

# get names of columns that need fixed
(cols2fix<-setdiff(names(dtmerge),c('rn','order')))
# for each column replace NA with an empty string
for(x in cols2fix){
  set(dtmerge,dtmerge[,.I[is.na(get(x))]],which(names(dtmerge)==x),'')
}

# start transforming into a Latex table
(tabMain<-lapply(1:Nmods,function(x) c(rbind(dtmerge[[paste0('Estp',x)]],dtmerge[[paste0('SE',x)]]))))
(tabMain<-do.call(cbind,tabMain))
(tabMain<-apply(tabMain,1,function(x) paste(x,collapse=' & ')))

# coefficient names for table
tabCoefs<-c('College','~graduate','Female','','Nonwhite','','Age 30--44','','Age 45--59','','Age 60+','','Parent','','Days in','~panel','log(URLs','~visited)','Vaccine','~favorability','Constant','')

# add coefficients
(tabMain<-paste(tabCoefs,tabMain,sep=' & '))
# add line break at end
(tabMain<-paste0(tabMain,' \\\\'))

# extract r squareds
(modR2s<-lapply(modlist,`[[`,2))
# extract model Ns
(modNs<-lapply(modlist,`[[`,3))

# create a bottom table that has this additional model information
(tabBottom<-cbind(c('$R^2$','$N$'),rbind(unlist(modR2s),unlist(modNs))))
(tabBottom<-apply(tabBottom,1,function(x) paste(x,collapse=' & ')))
(tabBottom<-paste(tabBottom,'\\\\'))

# put the table strings together
(tabContent<-c(tabMain,'\\midrule',tabBottom))

# top of latex table
tab_head<-c('\\begin{table}[!p]',
            '\\caption{Correlates of exposure to online vaccine-related information (unweighted)}',
            '\\begin{center}',
            '\\begin{threeparttable}',
            '\\begin{tabular}{l *{8}{S[table-format=1.3,table-space-text-post={$^{***}$}]} }',
            '\\toprule',
            '& \\multicolumn{4}{c}{\\myuline{Non-skeptical webpages}} & \\multicolumn{4}{c}{\\myuline{Vaccine-skeptical webpages}} \\\\',
            '& \\multicolumn{2}{c}{Visited $\\geq 1$ page} & \\multicolumn{2}{c}{Total pages visited} & \\multicolumn{2}{c}{Visited $\\geq 1$ page} & \\multicolumn{2}{c}{Total pages visited} \\\\',
            '\\midrule')

# notes for latex table
tab_notes<-paste('\\item \\leavevmode\\kern-\\scriptspace\\kern-\\labelsep',
                 '$^{*} p < 0.05$, $^{**} p < .01$, $^{***} p<.001$ (two-sided); OLS models with standard errors clustered by respondent. All models include panel fixed effects. Data from YouGov Pulse panel participants with online traffic data available. Each observation represents behavioral web traffic data associated with responses to one of seven national surveys conducted from 2016--2019. Vaccine attitudes data were collected in prior surveys among a subset of respondents.')

# bottom of latex table
tab_foot<-c('\\bottomrule',
            '\\end{tabular}',
            '\\begin{tablenotes}[flushleft]',
            tab_notes,
            '\\end{tablenotes}',
            '\\end{threeparttable}',
            '\\end{center}',
            '\\label{table:taba1}',
            '\\end{table}')

# put latex strings together
(tab_fin<-c(tab_head,tabContent,tab_foot))

# save the table
writeLines(tab_fin,'TableA1.tex')
